*****
*** Authors: Peter Eppinger (PE), Hong Ma (HM)
*** Purpose: Prepare data for the analysis
*** Stata version: 16.1
*** Date: February 20, 2024
***
***** 1. Header
***** 2. Preparation of non-firm data
***** 3. Merge of ASIP with other data
***** 4. Construction of key variables
***** 5. TFP index
***** 6. Finalizing
***
****************************************************************************************************************
***** 1. Header

clear all
set linesize 255
cap log close
log using "$results\1_FDI_liberalization_dataprep.log", replace

****************************************************************************************************************
***** 2. Preparation of non-firm data

********************************************************
**** 2.1 Firm-level FDI policy

** FDI policy by firm-product 1998-2007: equity restrictions and broad policies res/frd/enc 
use "$working_data\policy_nlp\firm_level_policy9807_rv4", clear
sort firm_id year
save, replace

** FDI policy for alternative cutoffs of the similarity score: 0.6 or 0.7
use "$working_data\policy_nlp\policy_cutoff60", clear
sort firm_id year
save, replace

use "$working_data\policy_nlp\policy_cutoff70", clear
sort firm_id year
save, replace

********************************************************
**** 2.2 Industry-level FDI policy from the literature

** FDI policy measure from replication folder by Brandt et al (2017, AER)
use "$working_data\public_data\protection-measures.dta", clear
keep cic_adj year fdi_prohibited fdi_restricted any_fdi
rename fdi_restricted res_brandt
rename fdi_prohibited frd_brandt
rename any_fdi resfrd_brandt

* construct liberalization dummy (in 2002 vs before)
sort cic_adj year
xtset cic_adj year
gen liberalized_brandt1=0 if resfrd_brandt==1 & L.resfrd_brandt==1
replace liberalized_brandt1=1 if resfrd_brandt==0 & L.resfrd_brandt==1
keep if year>=1998 & year<=2007
tab liberalized_brandt1 year,m
gen liberalized02_brandt=liberalized_brandt1 if year==2002
drop liberalized_brandt1
bys cic_adj: egen liberalized_brandt= max(liberalized02_brandt)
bys cic_adj: egen ever_restr=max(resfrd_brandt)
replace liberalized_brandt=2 if ever_restr==0
recode liberalized_brandt (.=99)
label def lib_brandt 0 "still restricted in 2002" 1 "liberalized in 2002" 2 "never restricted" 99 "other (e.g. newly restricted)"
label val liberalized_brandt lib_brandt
label var liberalized_brandt "Liberalized (1) vs. still restricted (0) vs. never restricted (2), Brandt at el. (2017)"
drop liberalized02_brandt ever_restr
rename cic_adj cic_code
compress
save "$prepared_data\BrandtEtAl_FDI_policy.dta", replace

** FDI policy measure provided by Sheng and Yang (2016, JDE), not authorized to publish
* use "$prepared_data\Sheng_Yang_FDI_policy.dta", clear

  
********************************************************
**** 2.3 Product-level tariffs and trade policy uncertainty data

** Trade policy uncertainty (TPU) from Pierce and Schott (AER)
use "$working_data\public_data\tar_val.dta", clear

*use hs6 code after 1995
keep if year > 1995 
gen HS96 = substr(hs8,1,6)
collapse (mean) ntr_rate nonntr_rate , by(HS96 year)
gen gap1 = (nonntr_rate - ntr_rate)
destring HS96, replace
gsort HS96 year
bys HS96: gen y=_n
keep if y==1 
replace gap1=0 if gap1==. & ntr_rate~=.
drop y year nonntr_rate ntr_rate
label var gap1 "nonNTR-NTR, simple average"
sort HS96
compress
save  "$prepared_data\TPU_hs6_schott.dta", replace

*** Tariffs in 1998
use "$working_data\public_data\CHNtariff1998_2007.dta", clear //downloaded from WITS
keep if year==1998
rename AHS_simple AHS_simple98
rename hs6 HS96
drop year BND_simple importsvaluein1000usd
compress
save  "$prepared_data\CHNtariff1998.dta", replace

*** Product-HS correspondence for NBS firms
use "$working_data\match_firm_HS6\firm_NBS_HS_match2000_03.dta", clear
save, replace

*** tariffs in 1998
merge m:1 HS96 using "$prepared_data\CHNtariff1998.dta"
unique HS96 if _merge==1   // no match for 3 HS codes， 2347 obs are cases with missing HS codes
tab HS96 if _merge==1,m
drop if _merge==2
drop _merge

*** trade policy uncertainty from Pierce & Schott (AER) for the first year in which we observe this HS code after the year 1995, when the 1996 classification was used:
merge m:1 HS96 using "$prepared_data\TPU_hs6_schott.dta"
drop if _merge==2
drop _merge
compress
save "$prepared_data\tariffs_TPU_firm_product2000_03.dta", replace

* erase intermediate working datasets
erase "$prepared_data\TPU_hs6_schott.dta"
erase "$prepared_data\CHNtariff1998.dta"


********************************************************
**** 2.4 NBER CES
* data source: NBER-CES Manufacturing Productivity Database
* Bartelsman, E and W. Gray. 1996. "The NBER Manufacturing Productivity Database." Technical Working Paper 205, National Bureau of Economic Research.
* at SIC 1987 level
* we use crosswalks to match it to the CIC industry level

use "$working_data/public_data/sic5809.dta", clear
keep if year ==1990 // US K/L in 1990 may be closer to the case of China K/L in 2000
* - capital intensity
gen KwL_nber=cap/pay
* - skill intensity
gen wSL_nber=1-prodw/pay
keep sic year wSL_nber KwL_nber
reshape wide wSL_nber KwL_nber, i(sic) j(year)  
sort sic
tempfile sic_kl
save `sic_kl', replace

** construct CIC level KL ratio
* note cic_adj is the CIC industry code consisent with cic02 and cic03, provided by Brandt et al. (2012 JDE).
use "$working_data/public_data/crosswalk/CIC_SIC1987_concord.dta", clear
drop sicindustry
sort sic1987
ren sic1987 sic
merge m:1 sic using `sic_kl'
drop if _merge==2
drop _merge
collapse (mean) K* wS*, by(cic_adj)
rename cic_adj cic_code
label var KwL_nber1990 "K/wL ratio (capital stock/ payroll) based on NBER-CES, in 1990"
label var wSL_nber1990 "wS/L ratio (share of non-production worker wages) based on NBER-CES, in 1990"
save "$prepared_data\CIC_US_NBER_CES.dta", replace

********************************************************
**** 2.5 External financial dependence and R&D intensity oof US industries from Kroszner, Laeven, and Klingebiel (2007)

* match using crosswalks b/w cic_adj code to isic-rv3 and then to isic-rv2

use "$working_data/public_data/credit_kroszner.dta", clear // data from Kroszner et al at ISIC2 level
sort isic2
tempfile credit
save `credit', replace

* crosswalk between ISIC2 and ISIC3
use "$working_data/public_data/crosswalk/isic_rv3_rv2.dta", clear
keep isic3 isic2
duplicates drop
tostring isic2, replace
replace isic2=substr(isic2,1,3) if isic2!="3211"&isic2!="3411"&isic2!="3511"&isic2!="3513"&isic2!="3522"&isic2!="3825"&isic2!="3832"&isic2!="3841"&isic2!="3843"
duplicates drop
sort isic2
merge m:1 isic2 using `credit'
tab _merge
keep if _merge==3
bys isic3: gen y=_N
tab y
drop _merge y
collapse (mean) exf* rd_klk, by(isic3)
sort isic3
tempfile isic3_code
save `isic3_code', replace

* crosswalk from HS6 to ISIC2
use "$working_data/public_data/crosswalk/cic_isic_concordance.dta", clear
compress
count
sort isic_rv3
ren isic_rv3 isic3
merge m:1 isic3 using `isic3_code'
tab _merge
drop if _merge==2
drop _merge
sort cic_code
// two missing industries could be imputed
replace exf_klk=-0.15 if cic_code==1320
replace rd_klk=0.01 if cic_code==1320

replace exf_klk=-0.01 if cic_code==2631
replace rd_klk=0.02 if cic_code==2631 

label var exf_klk "External financial dependence for US firms by ISIC from Kroszner et al (2007)"
label var rd_klk "R&D intensity for US firms by ISIC from Kroszner et al (2007)"

save "$prepared_data\cic_external_finance.dta", replace


********************************************************
**** 2.6 Deflators from Brandt et al 2012

use "$working_data\public_data\benchmark_input_deflator.dta", clear
rename  cic_adj cic_code
reshape long InputDefl ,i(cic_code) j(year)
save "$prepared_data\benchmark_input_defl_long.dta", replace

use "$working_data\public_data\benchmark_output_deflator.dta", clear
rename  cic_adj cic_code
reshape long OutputDefl ,i(cic_code) j(year)
save "$prepared_data\benchmark_output_defl_long.dta", replace



****************************************************************************************************************
***** 3. Merge ASIP with other data

use "$proprietary_data\data9807_manuf1.dta", clear  //  cleaned manfacturing sample from the Annual Surveys of Above-scale Industrial Firms, 1998-2007 // from FDI_project\data\

drop if firm_id==.

destring cic_code, replace
destring industry_code, replace
destring IO2002, replace

** add deflators
merge m:1 cic_code year using "$prepared_data\benchmark_output_defl_long.dta"
*tab cic_code if _merge==1 								// waste industry, OK
*tab cic_code if _merge==2
drop if _merge==2
drop _merge

merge m:1 cic_code year using "$prepared_data\benchmark_input_defl_long.dta"
drop if _merge==2
drop _merge

rename OutputDefl output_defl
rename InputDefl input_defl

** add CPI from WDI
merge m:1 year using "$working_data\public_data\wdi_prices.dta", keepusing(cpi98)
drop if _merge==2
drop _merge

* imports of capital goods and import shares
merge 1:1 firm_code year using "$proprietary_data\firm_import_data_red.dta", keepusing(v_imp v_Kgood s_imp) // from FDI_project\data\China_Customs_Data
drop if _merge==2
drop _merge
recode s_imp (.=0)  //assuming zero imports if none recorded in customs data
gen imp_share=s_imp
replace imp_share=1 if s_imp>1   //issue with import share: winsorize at 1 (approx. 99th percentile)
rename v_imp import
rename v_Kgood import_K
drop s_imp

** add firm-level FDI policy data:
merge m:1 firm_id year using "$working_data\policy_nlp\firm_level_policy9807_rv4.dta"
tab cic_code if _merge==1 // mostly in industry cic_code==3663 (weaponry or military purpose)
drop if _merge==1 
drop _merge

** add industry-level FDI policy measures from the literature
merge m:1 cic_code year using "$prepared_data\BrandtEtAl_FDI_policy.dta" 
drop if _merge==2
drop _merge

merge m:1 cic_code year using "$proprietary_data\Sheng_Yang_FDI_policy.dta" // from FDI_project\data\ originally provided by Liugang Sheng
drop if _merge==2
drop _merge

** add tariffs in 1998 and trade policy uncertainty in 1996 for products produced (mostly) in 2001:
merge m:1 firm_id using "$prepared_data\tariffs_TPU_firm_product2000_03.dta"
foreach x in product HS96 {
    rename `x' `x'_2001 // year is mostly 2001, actual year is in fact HSproduct_year
}
rename gap1 TPU_1996
drop _merge

** add capital and skill intensity, based on NBER CES database by industry in 1990
merge m:1 cic_code using  "$prepared_data\CIC_US_NBER_CES.dta", keepusing(wSL_nber1990 KwL_nber1990)
drop if _merge==2
drop _merge

** add contractability from Feenstra-Hong-Ma-Spencer (2013, JEBO), based on Nunn (2007, QJE) and Rauch's (1999, JIE) classification by 6-digit HS and concordance of HS to IO sectors
* z_lib2: Nunn contract intensity, liberal, fraction of differentiated+referenced
merge m:1 IO2002 using "$working_data\public_data\io2002_nunnINDEX.dta", keepusing(z_lib2)
drop if _merge==2
drop _merge
rename z_lib2 nunn_lib2

** add external financial dependence a la Rajan/Zingales and R&D intensity from Kroszner et al (2007)
merge m:1 cic_code using "$prepared_data\cic_external_finance.dta", keepusing(exf_klk rd_klk)
drop if _merge==2
drop _merge


****************************************************************************************************************
***** 4. Construction of key variables

********************************************************
**** 4.1 Firm characteristics

* ownership
gen foreign_share=foreign_capital/paidin_capital
gen foreign_full=0
replace foreign_full=1 if foreign_share==1

* shares/ ratios
gen statecoll_share=(state_capital +col_capital)/paidin_capital
gen liq_ratio= (current_asset -current_liability)/totalasset // liquidity ratio: (current assets- current liabilities)/total assets
gen lev_ratio= total_liability/totalasset // leverage ratio: "ratio of total liabilities to total assets"
gen profit_ratio= profit/gross_output
gen newprod_share=reve_new_product/gross_output
gen exp_share=export/output_sales_value

* remove implausible values
replace foreign_share=. if foreign_share<0
replace foreign_share=. if foreign_share>1 & foreign_share<.
replace exp_share=. if exp_share<0
replace exp_share=. if exp_share>1 & exp_share<.
cap drop newprod_share
gen newprod_share=reve_new_product/gross_output // revised
replace newprod_share=. if newprod_share<0
replace newprod_share=. if newprod_share>1 & newprod_share<.

* firm age
bys firm_id: egen first_start_year=min(start_year)
gen age=year-first_start_year
drop first_start_year

* deflate variables
gen real_capital=fa_net/input_defl*100
gen real_output=gross_output/output_defl*100
gen real_input=input/input_defl*100
gen real_wage=wage/cpi98*100
gen wage_empl=real_wage/employee
gen real_import= import/input_defl*100
gen real_kimport=import_K/input_defl*100

* log variables
gen loutput=ln(real_output)
gen lwage_empl =ln(wage_empl)
gen lempl=ln(employee)
gen lcap_empl=ln(real_cap/employee)
gen lcapital=ln(real_cap)
gen limport=ln(real_import)
gen lkimport=ln(real_kimport)

********************************************************
**** 4.2 Definition of liberalization and treatment dummies

** For Table 1: Liberalized vs. still restricted firms
* zero if restricted in 2001 and same product will not be liberalized
gen lib1=0 if year==2001 & (policy97<2 | no_wfie97==1 | re_major97==1 | major97==1) & (policy02<2 | no_wfie02==1 | re_major02==1 | major02==1)
* one if restricted in 2001 and same product fully liberalized in 2002 (no restriction left)
replace lib1=1 if year==2001 & (policy97<2 | no_wfie97==1 | re_major97==1 | major97==1) & (policy02==2 | policy02==3) & (no_wfie02==0 & re_major02==0 & major02==0)
bys firm_id: egen lib1_full01=max(lib1)
drop lib1

** For main analysis (Figure 3 and Tables 2-4): Liberalized vs. never regulated firms
* define never regulated firms:
egen maxpolicy=rowmax(policy97 policy02)
egen minpolicy=rowmin(policy97 policy02)
bys firm_id: egen maxreg=max(maxpolicy)
bys firm_id: egen minreg=min(minpolicy)
tab maxreg minreg
egen maxeqres=rowmax(no_wfie97 re_major97 major97 no_wfie02 re_major02 major02)
bys firm_id: egen maxequity=max(maxeqres)
sum maxpolicy minpolicy maxreg minreg maxeqres maxequity
gen neverreg=0 
replace neverreg=1 if minreg==2 & maxreg==2 & maxequity==0 

* Treated = liberalized:
gen treated=1 if lib1_full01==1
* Control = never regulated:
replace treated=0 if neverreg==1
* old code (v5): libfull01_vs_neverreg: lib1_full01 vs. neverreg

label var lib1_full01 "FDI liberalized in 2002 vs. still restricted firms (based on 2001 products)"
label var neverreg "Firm never subject to any FDI policy in 1998-2007"
label var treated "FDI liberalized in 2002 (based on 2001 products) vs. never regulated firms"

drop maxpolicy minpolicy maxreg minreg maxeqres maxequity

********************************************************
**** 4.3 Definition of full foreign acquisition

* initially not (fully) foreign-owned firms:
bys firm_id: egen max_FO_before02=max(foreign_share) if year<2002
bys firm_id: egen max_FO_before2002=max(max_FO_before02)
drop max_FO_before02
* max FO share since 2002
bys firm_id: egen max_FO_since02=max(foreign_share) if year>=2002
bys firm_id: egen max_FO_since2002=max(max_FO_since02)
drop max_FO_since02

* identify whether firm experienced dropping of FDI restrictions in 2002
cap drop idrop*
gen idrop_frd=0 if policy02<.
gen idrop_majreq=0 if policy02<.
gen idrop_JVreq=0  if policy02<.
gen idrop_res=0  if policy02<.
gen inew_enc=0  if policy02<.
sort firm_id year
forvalues y=2002/2007{
	local a=`y'-2001
	replace idrop_frd=1 if year==`y' & L`a'.policy97==0 & policy02>0 & policy02<.
	replace idrop_majreq=1 if year==`y' & (L`a'.major97==1 | L`a'.re_major97==1) & (major02==0  & re_major02==0)
	replace idrop_JVreq=1 if year==`y' & L`a'.no_wfie97==1 & no_wfie02==0
	replace idrop_res=1 if year==`y' & L`a'.policy97==1 & policy02>1 & policy02<.
	replace inew_enc=1 if year==`y' & L`a'.policy97<3 & policy02==3
}
bys firm_id: egen Cdrop_frd= max(idrop_frd)
bys firm_id: egen Cdrop_majreq= max(idrop_majreq)
bys firm_id: egen Cdrop_JVreq= max(idrop_JVreq)
bys firm_id: egen Cdrop_res= max(idrop_res)
bys firm_id: egen Cnew_enc= max(inew_enc)

* main acquisition definition: acquisition by 100% (indep. of thresholds) vs. stay below 100%
gen treat_99init_vs99 =0 if max_FO_before2002<1 & max_FO_since2002<1
replace treat_99init_vs99 =1 if max_FO_before2002<1 & year>=2002 & foreign_share==1
bys firm_id: egen acq100=max(treat_99init_vs99)
drop treat_99init_vs99

* alternative acquisition definition: acquisition across threshold or to 100% vs. stay below threshold (or below 1 for general/ no restrictions)
gen acq100_EquityThresh=0 if Cdrop_frd==1 & max_FO_before2002==0 & max_FO_since2002==0
replace acq100_EquityThresh =0 if Cdrop_JVreq==1 & max_FO_before2002<1 & max_FO_since2002<1
replace acq100_EquityThresh =0 if Cdrop_majreq==1 & max_FO_before2002<0.5 & max_FO_since2002<0.5
replace acq100_EquityThresh =0 if Cdrop_res==1 & max_FO_before2002<1 & max_FO_since2002<1 // some dropped majority req. and general restr. => zeros may be sensitive to the order of coding!
replace acq100_EquityThresh =0 if neverreg==1 & max_FO_before2002<1 & max_FO_since2002<1
replace acq100_EquityThresh =1 if idrop_frd==1 & max_FO_before2002==0 & year>=2002 & foreign_share>0
replace acq100_EquityThresh =1 if idrop_JVreq==1 & max_FO_before2002<1 & year>=2002 & foreign_share==1
replace acq100_EquityThresh =1 if idrop_majreq==1 & max_FO_before2002<0.5 & year>=2002 & foreign_share>=0.5
replace acq100_EquityThresh =1 if idrop_res==1 & max_FO_before2002<1 & year>=2002 & foreign_share==1
replace acq100_EquityThresh =1 if neverreg==1 & max_FO_before2002<1 & year>=2002 & foreign_share==1
bys firm_id: egen acq100_Equity=max(acq100_EquityThresh)
drop acq100_EquityThresh

drop idrop_* Cdrop_* inew_enc Cnew_enc  max_FO_before2002 max_FO_since2002 

****************************************************************************************************************
**** 4.4 TFP index

* Törnqvist index based on Caves, Christensen, and Diewert (1982, EconJ): equation (36)
* and Aw, Chen, and Roberts (2001, JDevEcon): equation (1)
* across all firms, relative too a single reference firm (not within industry)

 ** wage bill
gen labor_share=wage/gross_output
sum labor_share,d
replace labor_share=. if labor_share<0 | labor_share>1

** material cost
gen material_share=input/gross_output
sum material_share,d
replace material_share=. if material_share<0 | material_share>1

** capital_share
gen capital_share=1-labor_share-material_share
sum capital_share,d
replace capital_share=. if capital_share<0 | capital_share>1

gen llabor=lempl
gen lmaterial=ln(real_input)

** variables: 
sum loutput llabor lcapital lmaterial labor_share capital_share material_share

** (i) generate yearly averages [Xbar(t)] of output, inputs, and input shares (for hypothetical reference firm) by industry
foreach var of varlist loutput llabor lcapital lmaterial labor_share capital_share material_share {
	bys year: egen mean_`var'=mean(`var')
}

** (ii) generate differences of mean variables [DX(t)=Xbar(t)-Xbar(t-1)] over time (for chain-linking)
sort firm_id year
foreach var of varlist mean_loutput mean_llabor mean_lcapital mean_lmaterial {
	sort firm_id year
	gen D1`var'= D.`var'
	bys year: egen D`var'=median(D1`var')
	drop D1`var'
}

** (iii) generate lags of mean input shares
foreach var of varlist mean_labor_share mean_capital_share mean_material_share {
	sort firm_id year
	gen L1`var'= L.`var'
	bys year: egen L`var'=median(L1`var')
	drop L1`var'
}

** (iv) generate input weights [WX(t)=0.5(SX(t)+SXbar(t))] and lagged mean input weights [WXbar(t)=0.5(SX(t)+SX(t-1))]
foreach input in labor capital material {
	gen `input'_weight=0.5*(`input'_share+mean_`input'_share)
	gen Lmean_`input'_weight=0.5*(mean_`input'_share+Lmean_`input'_share)
}

** (v) sum over changes in all previous years [SUMX(t)=sum(s<=t)WXbar(s)*DX(s)] in average output and inputs by industry
* output
cap drop count
bys firm_id: egen count=count(year)
tab count
* by firm because same values for all firms within industries, BUT panel defined by firms
* issue: chain-linking cannot be computed at the firm level because some firms are not observed in gap years, 
* 		  which would lead to different chain-linked sums in later years for different firms
bys firm_id (year): gen chain_diff_output_help= sum(Dmean_loutput) if count==10 // compute chain-linked sums only for firms that are always observed
bys year: egen chain_diff_output= median(chain_diff_output_help)
sort firm_id year
sum Dmean_loutput chain_diff_output chain_diff_output_help
drop chain_diff_output_help

* same procedure for inputs, but weighted!
foreach input in labor capital material {
	gen chain_diff_`input'=.
	forvalues y=1999/2007 {  // new!
		bys firm_id: egen chain_diff_`input'`y'= total(Lmean_`input'_weight*Dmean_l`input') if year<=`y' & count==10 //only for firms that are always observed
		egen chain_diff_`input'h`y'= median(chain_diff_`input'`y') if year==`y'
		replace chain_diff_`input'=chain_diff_`input'h`y' if year==`y'
		drop chain_diff_`input'`y' chain_diff_`input'h`y'
	}
}
sort firm_id year

* - in first year (1998): just output and inputs relative to mean
gen ltfp_index= (loutput-mean_loutput) - ( labor_weight*(llabor-mean_llabor) /*
*/ +capital_weight*(lcapital-mean_lcapital) +material_weight*(lmaterial-mean_lmaterial)) if year==1998
* - in later years: add chain linking
replace ltfp_index= (loutput-mean_loutput) +  chain_diff_output - ( labor_weight*(llabor-mean_llabor) /*
*/ +capital_weight*(lcapital-mean_lcapital) +material_weight*(lmaterial-mean_lmaterial) /*
*/ + chain_diff_labor + chain_diff_capital + chain_diff_material) if year>=1999

drop mean_loutput mean_llabor mean_lcapital mean_lmaterial mean_labor_share mean_capital_share mean_material_share
drop Dmean_loutput Dmean_llabor Dmean_lcapital Dmean_lmaterial
drop Lmean_labor_share Lmean_capital_share Lmean_material_share
drop labor_weight capital_weight material_weight
drop Lmean_labor_weight Lmean_capital_weight Lmean_material_weight
drop count chain_diff_output chain_diff_labor chain_diff_capital chain_diff_material

drop labor_share material_share capital_share llabor lmaterial
drop real_capital real_output real_input real_wage real_import real_kimport

********************************************************
**** 4.5 Sector codes and time dummies

* 2-digit CIC codes
gen cic2=floor(cic_code/100)

* after 2001 dummy
gen after01=0 if year<=2001
replace after01=1 if year>2001

****************************************************************************************************************
***** 6. Finalizing

* labeling
label var firm_id "firm ID"
label var firm_code "firm code"
label var year "year"
label var after01 "Dummy equal to one in 2002-2007, zero before"
label var cic_code "Adjusted industry classification based on CSIC"
label var cic2 "2-digit CIC sector code"
label var IO2002 "Industry code by 2002 US input-output classification"
label var foreign_capital "Foreign capital incl. Hong Kong, Macao and Taiwan"
label var legal_person_capital "capital owned by legal person"
label var private_capital "capital owned by private"
label var hkt_capital "capital owned by Hongkong, Macao and Taiwan"
label var for_capital "capital owned by foreigners"
label var lterm_liability "total long term liability"
label var total_liability "total liabilities"
label var total_equity "total stockholders' equity"
label var fin_expense "financial expenses"
label var interest_paid "interest paid"
label var export "value of goods exported"
label var gross_output "Gross output" 
label var employee "average number of annual employees"
label var value_added "value added"
label var region_code "region code  (6 digits, county level)"
label var industry_code "four digit industry code"
label var start_year "year of starting operation"
label var status "operation status"
label var regist_type "types of registration"
label var state_share "dummy variable"
label var inventory "inventory"
label var current_asset "Current assets" 
label var fa_original "original cost of fixed assets"
label var a_dep "accumulated depreciation"
label var c_dep "current year depreciation"
label var fa_net "annual average net book value of fixed assets surplus"
label var totalasset "total value of assets"
label var current_liability "total current liability"
label var sales_cost "total sales costs"
label var sales_expense "total sales expenses"
label var admin_expense "administrative expenses"
label var paidin_capital "paid-in capital"
label var state_capital "capital owned by state"
label var col_capital "capital owned by collective"
label var net_current_asset "annual average current assets surplus"
label var long_invest "long-term investment"
label var total_sales "total sales income"
label var real_cap "Real capital stock, derived by perpetual inventory method"
label var foreign_share "Share of foreign capital"
label var foreign_full "Dummy for full foreign ownership (100%)"
label var output_defl "Output deflator (CIC 4-digit level)"
label var input_defl "Input deflator (CIC 4-digit level)"
label var import "Total value of imports"
label var imp_share "Share of imported inputs"
label var reve_new_product "Revenue in new products"
label var newprod_share "Revenue share of new products"
label var profit_ratio "Profits over sales (gross output)"
label var wage_empl "Real wage per employee"
label var loutput "Log of real output"
label var lwage_empl "Log of real wage per employee"
label var lempl "Log number of employees"
label var lcap_empl "Log capital intensity (real capital stock per employee)"
label var lcapital "Log of the real capital stock (perpetual inventory method)"
label var limport "Log of real imports"
label var lkimport "Log of real imports of capital goods (Burstein et al, 2013)"
label var ltfp_index "Log of Törnqvist TFP index a la Caves et al/ Aw et al"
label var statecoll_share "State or collectively owned share of equity (paid in capital)"
label var exp_share "Exports over total sales value"
label var age "Firm age"
label var liq_ratio "Liquidity ratio: (current assets - current liabilities)/total assets"
label var lev_ratio "Leverage ratio: total liabilities / total assets"
label var frd_brandt "Forbidden FDI in this industry (Brandt et al, 2017, AER)"
label var res_brandt "Restricted FDI in this industry (Brandt et al, 2017, AER)"
label var resfrd_brandt "Restricted or forbidden FDI in this industry (Brandt et al, 2017, AER)"

sort firm_id year
xtset firm_id year
compress

order firm_id firm_code year cic_code cic2 IO2002 start_year after01 age foreign_share foreign_full statecoll_share ltfp_index loutput lempl lcapital lcap_empl lwage_empl liq_ratio lev_ratio profit_ratio newprod_share exp_share imp_share lkimport policy97 no_wfie97 major97 re_major97 policy02 no_wfie02 major02 re_major02 policy05 treated lib1_full01 neverreg acq100 acq100_Equity TPU_1996 AHS_simple98 res_sheng_yang frd_sheng_yang enc_sheng_yang res_brandt frd_brandt resfrd_brandt liberalized_brandt  wSL_nber1990 KwL_nber1990 nunn_lib2 exf_klk rd_klk
	
save "$prepared_data\comb_data_analysis.dta", replace


log close
